Data Sources:

Tables:

https://data.hdx.rwlabs.org/dataset/nepal-earthquake-severity-index https://data.hdx.rwlabs.org/dataset/io (sheet 3) https://data.hdx.rwlabs.org/dataset/population-movements-after-the-nepal-earthquake-v-3-up-to-11th-june-2015

Relevant materials and data cna be found at:

https://www.dropbox.com/sh/tb9854hzcof7x23/AACEDTGk8EmYQ6r4ukSFLBspa?dl = 0

in the folder /Displacement Network Model/

#
# LOAD PACKAGES
library(plyr)
library(dplyr)
library(igraph)
library(RColorBrewer)
#
#
# SET FILE SOURCE PATH
DIR <- "/Users/ggospodinov/Desktop/UN_OCHA_project/data/"
#
#
# DEFINE FUNCTIONS
#
#
#
# FUNCTION TO DISPLAY RELATIVE PERCENTAGES FOR HSITOGRAM COLUMNS
histP <- function(x,breaks, ...) {
  H <- hist(x, plot = FALSE, breaks=breaks)
  H$density <- with(H, 100 * density* diff(breaks)[1])
  labs <- paste(round(H$density), "%", sep="")
  plot(H, freq = FALSE, labels = labs, ylim=c(0, 1.08*max(H$density)),...)
}


# FUNCTION TO DISPLAY RELATIVE PERCENTAGE VALUES GREATER THAN 0
histP1 <- function(x,breaks, ...) {
  H <- hist(x, plot = FALSE, breaks=breaks)
  H$density <- with(H, 100 * density* diff(breaks)[1])
  labs <- ifelse(round(H$density)>0,paste(round(H$density), "%", sep=""),NA)
  plot(H, freq = FALSE, labels = labs, ylim=c(0, 1.08*max(H$density)),...)
}


# FUNCITON TO DISPLAY RELATIVE PERCENTAGE VALUES GREATER THAN 5, CAN BE HARDCODED
histP2 <- function(x,breaks, ...) {
  H <- hist(x, plot = FALSE, breaks=breaks)
  H$density <- with(H, 100 * density* diff(breaks)[1])
  labs <- ifelse(round(H$density)>5,paste(round(H$density), "%", sep=""),NA)
  plot(H, freq = FALSE, labels = labs, ylim=c(0, 1.08*max(H$density)),...)
}


# FUNCTION THAT TRIMS LEADING WHITESPACE
trim.leading <- function (x)  sub("^\\s+", "", x)


# FUNCTION THAT TRIMS TRAILING WHITESPACE
trim.trailing <- function (x) sub("\\s+$", "", x)


# FUNCTION THAT TRIMS LEADING OR TRAILING WHITESPACE
trim <- function (x) gsub("^\\s+|\\s+$", "", x)


# FUNCITON TO REMOVE ALL SPACES FROM LEVEL NAMES OF A VARIABLE
rm_space <- function(df,col_name){
  level_names <- unique(levels(df[,which(names(df) %in% col_name)]))
  df[,which(names(df) %in% col_name)] <- mapvalues(df[,which(names(df) %in% col_name)], 
                                                   from = level_names,
                                                   to = gsub("[[:space:]]","",level_names))
  return(df)
}

# FUNCTION THAT DROPS ISOLATED VERTICES
drop_isolated <- function(graph, vertex_colors, vertex_names) {
  
  # get the definitions
  g <- graph
  
  # filter to degree > 0 eliminate isolated vertices
  g_f <- delete.vertices(g,V(g)[degree(g)==0])
  v_g_f <- setdiff(V(g),V(g)[degree(g)==0])
  
  # filter names and color
  V(g_f)$name <- vertex_names[v_g_f]
  V(g_f)$color <- vertex_colors[v_g_f]
  return(g_f)
}


# FUNCITON THAT DROPS LOOPS
drop_loops <- function(graph, vertex_colors, vertex_names){
  g <- simplify(graph,remove.loops=TRUE)
  g_f <- delete.vertices(g,V(g)[degree(g)==0])
  v_g_f <- setdiff(V(g),V(g)[degree(g)==0])
  # filter names and color
  V(g_f)$name <- vertex_names[v_g_f]
  V(g_f)$color <- vertex_colors[v_g_f]
  return(g_f)
}


# DEFINE EDGE-FILTRATION FUNCTION FOR THE NETWORKS
filter <- function(cutoff,edge_matrix,vertex_colors,vertex_names) {
  
  # get the definitions
  cut <- cutoff
  adj <- edge_matrix
  adj[adj<cut] <- 0
  adj_0 <- adj
  
  # define the filtered graph
  g <- graph.adjacency(adj_0,mode="directed",weighted=TRUE)
  V(g)$color <- vertex_colors
  
  # filter to degree > 0 eliminate isolated vertices
  g_f <- delete.vertices(g,V(g)[degree(g)==0])
  v_g_f <- setdiff(V(g),V(g)[degree(g)==0])
  V(g_f)$name <- vertex_names[v_g_f]
  V(g_f)$color <- vertex_colors[v_g_f]
  
  return(g_f)
}


# DEFINE DEGREE FILTRATION FUNCTION FOR THE NETWORKS
filter_deg <- function(cutoff,edge_matrix,vertex_colors,vertex_names) {
  
  # get the definitions
  cut <- cutoff
  adj <- edge_matrix
  
  # set the cut-off
  adj_0 <- adj
  adj_0[adj_0>0] <- 1
  for (i in 1:dim(adj)[1]){
    if (sum(adj_0[,i])<cutoff){
      adj <- adj[,-i]
      adj <- adj[-i,]
    }
  }
  
  # define the filtered graph
  g <- graph.adjacency(adj,mode="directed",weighted=TRUE)
  V(g)$color <- vertex_color
  
  # filter to degree > 0 eliminate isolated vertices
  g_f <- delete.vertices(g,V(g)[degree(g)==0])
  v_g_f <- setdiff(V(g),V(g)[degree(g)==0])
  V(g_f)$name <- vertex_names[v_g_f]
  
  # color the filtered graph with sources and endpoints for the directed edges
  V(g_f)$color <- V(g)$colors[v_g_f]
  
  return(g_f)
}

# DEFINE WEIGHTED DEGREE FILTRATION USING graph.strength

# DEFINE A FILTRATION OF GRAPH TO DISPLAY LARGEST CLUSTER (GIANT COMPONENT)
giant_comp <- function(graph, vertex_colors, vertex_names){
  
  # get the definitions
  g <- graph
  
  # identify the largest cluster
  clusters <- as.data.frame(table(clusters(g)$membership))
  ind <- as.numeric(clusters[which(clusters$Freq==max(clusters$Freq)),]$Var1)
  vertices <- which(clusters(g)$membership==ind)
  vertices_complement <- which(clusters(g)$membership!=ind)
  
  # filter to only include the giant component
  g_f <- delete.vertices(g,V(g)[which(V(g) %in% vertices_complement)])
  v_g_f <- setdiff(V(g),V(g)[which(V(g) %in% vertices_complement)])
  V(g_f)$name <- vertex_names[v_g_f]
  
  # color the filtered graph with sources and endpoints for the directed edges
  V(g_f)$color <- vertex_colors[v_g_f]
  
  return(g_f)
}


# INITIAL DATA INPUT AND TRANSFORMATIONS
#
#
# COLUMN NAMES FOR CCCM_Nepal_DTM_R2.csv
#
#
# 1.1.c.1 Site ID (SSID)
# 1.1.d.1 Site Name
# 1.1.a.2 Survey Round
# 1.1.a.1 Survey date (DD.MM.YYYY)
# 1.1.e.2 Zone
# 1.1.e.3 District
# 1.1.e.4 VDC
# 1.1.f.2 Site GPS Latitude
# 1.1.f.1 Site GPS Longitude
# 1.4.a.2 Site open?  
# 1.4.c.1 Closing Date (DD.MM.YYYY)  
# 1.3.a.1 Site Classification  
# 1.3.b.1 Site Type  
# If other, please specify  
# 1.4.a.1 Site start/open date (DD.MM.YYYY)  
# 1.4.b.1 Site Expected Closing Date  
# 1.1.i.1 Accessibility to site  
# 1.3.d.1 Ownership of land of site  
# 1.3.c.1 What is the most common type of shelter  
# If other or No Answer, please specify  
# 1.2.b.1 Is there a Site Management Committee (SMC) at the site?  
# 1.2.b.10 % of women participating in the Site Management Committee (SMC)  
# 1.2.b.2 Is the site management committee (SMC) made up from the community at the site?  
# 1.2.b.7 SMC member name or focal point at site  
# 1.2.b.8 SMC member phone number  
# 1.2.c.1 Is there a Site Management Agency (SMA) at the site?  
# 1.2.c.2 Type of SMA  
# If other, Specify.  
# 1.2.c.3 Name of SMA  
# 1.2.c.4 SMA member name or focal point at site  
# 1.2.c.5 SMA member phone number  
# 1.2.a.1 Is there any registration activity/IDP List Maintained  
# 1.2.n.1 Is WASH support being provided at the site ?  
# If yes who provides the service?  
# 1.2.o.1 Is HEALTH support being provided at the site  
# If yes who provides the service?  
# 1.2.p.1 Is SHELTER/NFI support provided at the site ?  
# If yes who provides the service?  
# 1.2.q.1 Is FOOD support provided at the site ?  
# If yes who provides the service?  
# 1.2.r.1 Is PROTECTION support provided at the site ?  
# If yes who provides the service?  
# 1.2.s.1 Is EDUCATION support provided at the site ?  
# If yes who provides the service?  
# 1.2.t.1 Is LIVELIHOOD support being provided at the site?  
# If yes who provides the service?  
# 1.5.b.2 Place of Origin of the largest IDP group in camp (Zone)  
# 1.5.b.2 Place of Origin of the largest IDP group in camp (District)  
# 1.5.b.2 Place of Origin of the largest IDP group in camp (VDC)  
# 1.5.b.2 Place of Origin of the largest IDP group in camp (Ward)  
# 1.5.c.2 Place of Origin of the second largest IDP group (Zone)  
# 1.5.c.2 Place of Origin of the second largest IDP group District)  
# 1.5.c.2 Place of Origin of the second largest IDP group (VDC)  
# 1.5.c.2 Place of Origin of the second largest IDP group (Ward)  
# 2.1.a.1 Total Number of IDP Families/HHs  
# 2.1.c.1 Number of Males by age: <1 year  
# 2.1.c.1 Number of Males by age: 1-5 year  
# 2.1.c.1 Number of Males by age: 6-17 year  
# 2.1.c.1 Number of Males by age: 18-59 year  
# 2.1.c.1 Number of Males by age: 60+ year  
# 2.1.b.2 Total number of IDP Male Individuals  
# 2.1.d.1 Number of Females by age: <1 year  
# 2.1.d.1 Number of Females by age: 1-5 year  
# 2.1.d.1 Number of Females by age: 6-17 year  
# 2.1.d.1 Number of Females by age: 18-59 year  
# 2.1.d.1 Number of Females by age: 60+ year  
# 2.1.b.2 Total number of IDP Female Individuals  
# 2.1.b.2 Total number of IDP  
# 2.2.c.1 Number of pregnant women  
# 2.2.d.1 Number of breastfeeding mothers  
# 2.2.g.1 Number of persons with Disabilities  
# 2.2.f.1 Number of Persons with Chronic Diseases or Serious Medical Conditions  
# 2.2.n.1 Number of single female-headed households  
# 2.2.p.1 Number of child-headed households  
# 2.2.x.3 Number of elderly-headed households  
# 2.2.u.1 Number of members of marginalized caste/ethnicity  
# 2.3.c.1 Date of arrival of first IDP group  
# 2.3.c.2 Date of arrival of last IDP group  
# 2.3.e.1 Area of intended return for largest IDP group  
# 2.3.b.4 Have IDPs previously been displaced  
# 2.3.e.7 What is preventing the largest IDP group from returning home?  
# If other, please specify  
# 2.3.g.1 Estimated % of IDPs sleeping in the site  
# 2.3.f.1 Is there relocation plan for the IDPs?  
# 3.1.a.1 Percentage of HH living outside (no shelter)  
# 3.1.b.1 Percentage of HH living in tents  
# 3.1.c.1 Percentage of HH living in makeshift shelters  
# 3.1.d.1 Percentage of HH living indoors (solid walls)  
# 3.2.a.1 Percentage of HH with access to electricity  
# 3.2.b.1 Percentage of HH with access to safe cooking facilities  
# 3.2.c.1 Percentage of HH with private living area  
# 3.7.c.2 Percentage of HH with mosquito nets  
# 3.7.j.1 Most needed type of NFI  
# If other or No Answer, please specify  
# 3.7.j.2 Second most needed type of NFI  
# If other or No Answer, please specify  
# 3.7.j.3 Third most needed type of NFI  
# If other or No Answer, please specify  
# 3.8.b.1 Is there a need for shelter repair materials  
# 3.8.b.1 Is there a need for tools for shelter construction?  
# 4.1.a.1 Location of site's main water source (walking, one-way)  
# 4.1.b.1 Main non-drinking water source provided or available  
# If other, please specify  
# 4.1.f.1 What is the main drinking water source provided or available  
# If other, please specify  
# 4.2.a.1 Average amount of water available per day and per person  
# 4.3.e.1 What is the main problem with the water?  
# If other, please specify  
# 4.4.j.1 Condition of most of the latrines  
# 4.4.a.1 Number of functioning toilets available on-site  
# 4.8.b.1 Do toilets and bathrooms have locks from the inside  
# 4.1.g.1 Is the drinking water potable ?  
# 4.4.b.1 Availability of separate male and female toilets  
# 4.5.b.1 Availability of separate male and female bathing areas  
# 4.6.a.1 Garbage Disposal Type  
# If other, please specify  
# 4.4.n.1 Evidence of open defecation?  
# 4.7.g.1 Availability of hand-washing station filled in with water and soap close to the toilets?  
# 4.7.g.2 Evidence of hand-washing practices?  
# 5.1.a.1 Is there access to food (distribution, vouchers, trade, fishing…)  
# 5.1.e.1 Is there access to a market near from the site?  
# 5.1.d.1 Most common source for obtaining food  
# If other, please specify  
# 6.1.a.1 Screening for malnutrition conducted in the area? (i.e. weight, height, mid-upper arm circumference screening)  
# 6.1.b.1 Availability of supplementary feeding for pregnant and lactating mothers  
# 6.1.c.1 Availability of supplementary feeding for children  
# 7.1.b.1 What is the most prevalent health problem at the site  
# If other, please specify  
# 7.1.b.2 What is the second most prevalent health problem at the site  
# If other, please specify  
# 7.1.b.3 What is the third most prevalent health problem at the site  
# If other, please specify  
# 7.2.a.1 Access to health facilities  
# 7.2.a.4 Do most women utilize health facilities?  
# 7.2.b.1 Location of health facilities/services  
# 7.2.c.2 Who provides health facilities/services  
# If other, please specify  
# 7.2.a.3 Regular access to medicine  
# 8.1.b.1 Access to formal/informal education services for children from displaced HHs  
# 8.2.b.1 Distance to nearest education facility  
# 8.3.a.2 Of children in site attending school, what % are girls?  
# 8.3.a.2 Of children in site attending school, what % are boys?  
# 8.2.a.1 Location of formal/informal education facilities/services for children from displaced HHs  
# 9.1.a.1 Occupation/trade of majority of displaced households (coping mechanism)  
# If other, please specify  
# 9.2.h.1 Percentage of HH in the site with a source of income  
# 9.2.i.1 Access to income generating activities  
# 9.2.k.1 Do the majority of HHs in the site receive remittances?  
# 9.3.a.2 Is there livestock on site  
# 9.1.c.1 Do IDPs have access to land for cultivation?  
# 11.3.b.1 Do any recruiters come to the site for day labour, domestic work or working abroad?  
# 11.3.a.2 if yes, to which city/country  
# 10.1.a.2 Is there security on-site/settlement areas  
# 10.1.e.1 Are security incidents reported in the site  
# 10.1.b.1 Who provides main security in the site  
# If other, please specify  
# 10.2.g.1 Most common type of security incidents reported/known occurring in the settlement area  
# If others, please specify  
# 10.2.g.5 Are security incidents commonly reported in the community before the earthquake?  
# 10.2.i.4 Most reported problem in receiving support  
# If other, please specify  
# 10.1.c.1 Number of children friendly spaces  
# 10.1.d.1 Number of women friendly spaces  
# 10.1.f.1 Do the majority of people have identification card or other documentation?  
# 10.2.j.2 Reporting/referral mechanism for GBV survivors  
# 10.2.t.2 Are women segregated during menstruation?  
# If yes, where do they go?  
# 10.2.t.1 Are there issues of discrimination towards minority groups?  
# 10.3.a.1 Do men feel safe in site?  
# 10.3.a.2 Do children feel safe in site  
# 10.3.a.3 Do women feel safe at the site  
# 10.1.s.1 Is there adequate lighting in the majority of communal point (WASH facilities, public spaces…)  
# 11.1.a.1 Where do residents mostly get their information from?  
# If other, please specify  
# 11.2.c.1 Are complaints being reported?  
# 11.2.c.2 If yes, to whom?  
# 11.1.c.1 What is the main topic on which the community is requesting information on?  
# If other, please specify  
# 11.1.h.1 Is everyone aware that donations do not need to be exchanged for anything?  
# Site classification


# LOAD LAT/LON COORDINATES (OF CENTROIDS) AND HLCIT CODES
hlcit <- read.csv(paste0(DIR,"master_hlcit.csv"))
colnames(hlcit) <- c("lon","lat","vdc_name","vname","hlcit_code")
hlcit$hlcit_code <- as.factor(hlcit$hlcit_code)
hlcit$vname <- as.character(hlcit$vname)
hlcit$vdc_name <- as.character(hlcit$vdc_name)
hlcit <- rm_space(hlcit,"hlcit_code")
hlcit$hlcit_code <- as.numeric(levels(hlcit$hlcit_code))[hlcit$hlcit_code]


# READ IN THE DISPLACEMENT FILE DATA
dt_data <- read.csv(paste0(DIR,"CCCM_Nepal_DTM_R2.csv"), sep = ",")


# SET VAIRABLE NAMES THAT ARE SHORTER AND DO NOT CONTAIN SPACES AND OTHER SYMBOLS
nam <- c("ssid","site_name","survey_round","survey_date","zone","district","vdc","lat","lon","is_open","closing_date",
       "site_class","site_type","if_other","site_start","expect_close","access","owner","shelter_type",
       "specify","is_smc","women_smc","smc_locals","smc_members","member_phone","is_sma","sma_type",
       "if_other2","sma_name","sma_member","sma_phone","is_idp","is_wash","wash_provider","is_health",
       "health_provider","is_nfi","nfi_provider","is_food","food_provider","is_protect","protector",
       "is_educate","educator","is_livelihood","livelihood_provider","idp_origin_zone","idp_origin_district",
       "idp_origin_vdc","idp_origin_ward","idp2_origin_zone","idp2_origin_district","idp2_origin_vdc",
       "idp2_origin_ward","idp_hh","males_age_1","males_age_1-5","males_age_6-17","males_age_18-59",
       "males_age_over60","total_idp_males","females_age_1","females_age_1-5","females_age_6-17",
       "females_age_18-59","females_age_over60","total_idp_females","total_idp","preg","breastfeed",
       "disabled","chronical","female_hh","child_hh","elder_hh","marginal","idp_arrival","idp_last_arrival",
       "return_area","prev_idp","why_idp","if_other3","idp_pct_sleep","is_relocation","hh_pct_out","hh_pct_tents",
       "hh_pct_mkshift","hh_pct_indoors","hh_pct_electricity","hh_pct_cooking","hh_pct_private","hh_pct_nets",
       "nfi_most_need","if_other4","nfi2_most_need","if_other5","nfi3_most_need","if_other6","is_repairs",
       "is_tools","water_loc","non_drinking_water","if_other7","drinking_water","if_other8","ave_water_per_day",
       "water_prob","if_other9","latrines","no_wc","wc_locks","water_potable","m_f_wc","m_f_bath","garbage",
       "if_other10","open_wc","hand_wash","is_hand_wash","food_access","market_access","food_src","if_other11",
       "screening","food_for_preg","food_child","illness","if_other12","illness2","if_other13","illness3",
       "if_other14","access_hosp","women_use_hosp","hosp_loc","who_doc","if_other15","access_meds","access_school",
       "dist_school","girls_pct_school","boys_pct_school","school_loc","coping_mech","if_other16","hh_pct_income",
       "paid_work","is_remittance","is_livestock","is_land","recruit","to_which","is_security","reported",
       "main_security","if_other17","common_crimes","if_other18","before_reported","biggest_prob","if_other19",
       "child_friendly","women_friendly","id","gbv_survivors","w_segregated","where","discrimination","men_safe",
       "children_safe","women_safe","lights","info","if_other20","complaints","to_whom","info_need","if_other21",
       "donations","site_class2")
colnames(dt_data) <- nam


# DROP THE FIRST THREE ROWS DUE TO EXTRA NOTATION
dt_data <- dt_data[4:dim(dt_data)[1],]


# FORMAT THE POPULATION COUNT (IN HH) OF DISPLACEMENT
dt_data$idp_hh <- as.numeric(levels(dt_data$idp_hh))[dt_data$idp_hh]


# COLUMNS FOR THE INITIAL MODEL:
# 1.1.c.1 Site ID (SSID)
# 1.1.d.1 Site Name
# 1.1.a.2 Survey Round
# 1.1.a.1 Survey date (DD.MM.YYYY)
# 1.1.e.2 Zone
# 1.1.e.3 District
# 1.1.e.4 VDC
# 1.1.f.2 Site GPS Latitude
# 1.1.f.1 Site GPS Longitude
# 1.5.b.2 Place of Origin of the largest IDP group in camp (Zone)  
# 1.5.b.2 Place of Origin of the largest IDP group in camp (District)  
# 1.5.b.2 Place of Origin of the largest IDP group in camp (VDC)  
# 1.5.b.2 Place of Origin of the largest IDP group in camp (Ward)  
# 1.5.c.2 Place of Origin of the second largest IDP group (Zone)  
# 1.5.c.2 Place of Origin of the second largest IDP group District)  
# 1.5.c.2 Place of Origin of the second largest IDP group (VDC)  
# 1.5.c.2 Place of Origin of the second largest IDP group (Ward)  
# 2.1.a.1 Total Number of IDP Families/HHs 


# SELECT INITIAL COLUMNS, SAVE THE FILE
names <- c("ssid","site_name","survey_round","survey_date","zone","district",
         "vdc","lat","lon","idp_origin_zone","idp_origin_district","idp_origin_vdc",
         "idp_origin_ward","idp2_origin_zone","idp2_origin_district","idp2_origin_vdc",
         "idp2_origin_ward","idp_hh")
dt_data <- dt_data[,names]
write.csv(dt_data,file = paste0(DIR,"Nepal_DTM_Network_model1.csv"))


# READ IN DISPLACEMENT DATA
dt_data <- read.csv(paste0(DIR,"Nepal_DTM_Network_model1.csv"))
dt_data <- dt_data[,2:dim(dt_data)[2]]


# FORMAT THE VDC RELATED COLUMNS AS CHARACTER
dt_data$vdc <- trim(as.character(dt_data$vdc))
dt_data$idp_origin_vdc <- trim(as.character(dt_data$idp_origin_vdc))
dt_data$idp2_origin_vdc <- trim(as.character(dt_data$idp2_origin_vdc))


# FILTER TO USE DESTINATION VDC LEVELS AS WELL AS ORIGIN (1 OR 2) VDC ENTRIES THAT ARE NON-EMPTY
dt_data <- dt_data[nchar(dt_data$vdc)>0 & (nchar(dt_data$idp_origin_vdc) + nchar(dt_data$idp2_origin_vdc))>0,]


# RESOLVE VDC NAMES ACCORDING TO THE HLCIT MASTER LIST:
dt_data$vdc <- mapvalues(dt_data$vdc,
                         from = c("Barahbise","Barpak","Chandeni","Charikot","Gokarneshwar","Kathmandu",
                                  "Kathmandu Municipality","Lalitpur","Mankha","Manmaijn","Naikap Naya",
                                  "Puranagaun","Sanga","Sangkhu Suntol","Tokhasaraswati"),
                         to = c("Barhabise","Warpak","Chandeni Mandan","Narikot","Gokarneswor",
                                "Kathmandu Metropolitan","Kathmandu Metropolitan","Lalitpur Sub Metropolitan",
                                "Mangkha","Manmaiju","NaikapNayaBhanjyang","PuranogaunDapcha",
                                "Sangla","Sangkhu","TokhaSarswoti"))
dt_data$idp_origin_vdc <- mapvalues(dt_data$idp_origin_vdc,
                                    from = c("barabise","Bhimeshwor Municipality","Chautara Municipality","Maanka"),
                                    to = c("Barhabise","Bhimeswor Municipality","Chautara","Mahangkal"))
dt_data$idp2_origin_vdc <- mapvalues(dt_data$idp2_origin_vdc,
                                     from = c("barabise","Bhimeshwor Municipality","Jhaku"),
                                     to = c("Barhabise","Bhimeswor Municipality","Jhyanku"))


# CREATE A LIST OF UNIQUE VDC DESTINATION NAMES
vdc <- unique(c(dt_data$vdc, dt_data$idp_origin_vdc,dt_data$idp2_origin_vdc))
vdc <- vdc[-which(vdc == "")]
vdc1 <- intersect(vdc,hlcit$vdc_name)
vdc2 <- setdiff(vdc,hlcit$vdc_name)


# RESOLVE THE ONLY VDC NAME LEFT THAT IS NOT ON RECORD
index <- which(dt_data$vdc == setdiff(vdc2,hlcit$vname))
closest <- vector()
for (k in 1:dim(hlcit)[1]){
  closest[k] <- (dt_data$lat[index]-hlcit$lat[k])^2+(dt_data$lon[index]-hlcit$lon[k])^2
}
dt_data$vdc[index] <- hlcit$vname[which(closest == min(closest))]
vdc <- unique(c(dt_data$vdc, dt_data$idp_origin_vdc,dt_data$idp2_origin_vdc))
vdc <- vdc[-which(vdc == "")]
vdc1 <- intersect(vdc,hlcit$vdc_name)
vdc2 <- setdiff(vdc,hlcit$vdc_name)


# EXTRAPOLATE LHCIT NUMBERS FROM LAT AND LON FOR VDCS FROM HLCIT_MASTER
hl <- vector()
xc <- vector()
yc <- vector()
for (k in 1:length(vdc)){
  if (vdc[k] %in% vdc1){
    hl[k] <- hlcit$hlcit_code[which(hlcit$vdc_name == vdc[k])[1]]
    xc[k] <- hlcit$lat[which(hlcit$vdc_name == vdc[k])[1]]
    yc[k] <- hlcit$lon[which(hlcit$vdc_name == vdc[k])[1]]
  }
  if (vdc[k] %in% vdc2){
    hl[k] <- hlcit$hlcit_code[which(hlcit$vname == vdc[k])[1]]
    xc[k] <- hlcit$lat[which(hlcit$vname == vdc[k])[1]]
    yc[k] <- hlcit$lon[which(hlcit$vname == vdc[k])[1]]
  }
}
koords <- cbind(xc,yc)


# FURTHER NARROW DOWN COLUMNS
dt_data <- dt_data[,c("vdc","idp_origin_vdc","idp2_origin_vdc","idp_hh")]


# VDC-LEVEL ADJACENCY MATRIX FOR THE DISPLACEMENT GRAPH, AT THE LEVEL OF DISPLACEMENT TRACK
vdc_m <- matrix(0,nrow = length(vdc),ncol = length(vdc))


# CREATE A MATRIX THAT TRACKS THE NUMBERS OF DISPLACED POPULATIONS AS WELL 
# (THIS COULD BE USED TO DERIVE EDGE WEIGHTS)
dtm <- matrix(nrow = length(vdc),ncol = length(vdc))


# COMPUTE THE ENTRIES OF BOTH THE TRACKING DISPLACEMENT MATRIX 
# AND THE WEIGHTED DISPLACEMENT TRACKING MATRIX
for (i in 1:length(vdc)){
  for (j in 1:length(vdc)){
    vdc_m[[i,j]] <- length(dt_data[dt_data$idp_origin_vdc == vdc[i] & dt_data$vdc == vdc[j],1])+
      length(dt_data[dt_data$idp2_origin_vdc == vdc[i] & dt_data$vdc == vdc[j],1])
    dtm[[i,j]] <- (2/3)*sum(dt_data[dt_data$idp_origin_vdc == vdc[i] & dt_data$vdc == vdc[j],]$idp_hh)+
      (1/3)*sum(dt_data[dt_data$idp2_origin_vdc == vdc[i] & dt_data$vdc == vdc[j],]$idp_hh)
  }
}


# BUILD THE DIRECTED WEIGHTED VDC NETWORK
gv <- graph.adjacency(vdc_m,
                      mode = "directed",
                      weighted = TRUE)


# COLOR VDC NAMES OF ORIGIN (GREEN) AND VDC NAMES OF DESTINATION (BLUE)
V(gv)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gv)$color[k] <- "green"
  } 
}


# SET THE VERTEX LABELS
V(gv)$name <- vdc


# DROP ISOLATED VERTICES (NO DISPLACEMENT)
gv <- drop_isolated(graph = gv,
                    vertex_colors = V(gv)$color,
                    vertex_names= vdc)
plot(gv,
     layout = layout.fruchterman.reingold(gv,
                                          niter = 200,
                                          area = 2000*vcount(gv)),
     vertex.color = V(gv)$color,
     vertex.size = 9, 
     vertex.label = V(gv)$name,
     vertex.label.color = "black", 
     vertex.label.font = 1, 
     vertex.label.cex = 0.9, 
     edge.width = 0.2*(E(gv)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Abstract Nepal Displacement Network Flow (VDC Level, with Self-Loops)")
legend("topright",
       c("Displacement Origin","Displacement Destination"),
       fill = c("green","SkyBlue2"),
       bty = "n")

# DROP LOOPS ONLY VERTICES AS WELL ()
gv <- drop_loops(graph = gv,
                 vertex_colors = V(gv)$color,
                 vertex_names = vdc)

# RESULTING CLEANED UP GRAPH SHOWING NONTRIVIAL MIGRATION
plot(gv,
     layout = layout.fruchterman.reingold(gv,
                                          niter = 200,
                                          area = 2000*vcount(gv)),
     vertex.color = V(gv)$color,
     vertex.size = 9, 
     vertex.label = V(gv)$name,
     vertex.label.color = "black", 
     vertex.label.font = 1, 
     vertex.label.cex = 0.9, 
     edge.width = 2*(E(gv)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Abstract Nepal Displacement Network Flow (VDC Level, No Self-Loops)")
legend("topright",
       c("Origins of Displacement","Destinations of Displacement"),
       fill = c("green","SkyBlue2"),
       bty = "n")

# PLOT THE GRAPH WITH SELECTED COORDINATES
gv_coords <- koords[which(vdc %in% V(gv)$name),]
plot(gv,
     layout = gv_coords,
     vertex.color = V(gv)$color,
     vertex.size = 4, 
     vertex.label = V(gv)$name,
     vertex.label.color = "black", 
     vertex.label.font = 1, 
     vertex.label.cex = 0.7, 
     edge.width = (E(gv)$weight),
     edge.arrow.size = 0.4,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Nepal Displacement Geo-Network Flow (VDC Level)")
legend("topright",
       c("Origins of Displacement","Destinations of Displacement"),
       fill = c("green","SkyBlue2"),
       bty = "n")

# DEFINE THE WEIGHTED DISPLACEMENT GRAPH
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)

# SET VERTEX COLORS
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}


# SET THE VERTEX LABELS
V(gd)$name <- vdc


# PLOT THE WEIGHTED DISPLACEMENT GRAPH
gd <- drop_isolated(graph = gd,
                    vertex_colors = V(gd)$color,
                    vertex_names = V(gd)$name)


# DROP LOOPS ONLY VERTICES AS WELL 
gd <- drop_loops(graph = gd,
                 vertex_colors = V(gd)$color,
                 vertex_names = V(gd)$name)


# RESULTING CLEANED UP GRAPH SHOWING NONTRIVIAL MIGRATION
plot(gd, 
     layout = layout.fruchterman.reingold(gd,
                                          niter = 200,
                                          area = 2000*vcount(gd)),
     vertex.color = V(gd)$color,
     vertex.size = 9, 
     vertex.label = V(gd)$name,
     vertex.label.color = "black", 
     vertex.label.font = 2, 
     vertex.label.cex = 0.7, 
     edge.width = 0.02*(E(gd)$weight),
     edge.arrow.size = 0.7,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Abstract Nepal Displacement Network Flow (VDC Level)")
legend("topright",
       c("Displacement Origin","Displacement Destination"),
       fill = c("green","SkyBlue2"),
       bty = "n")

# SELECT THE COORDINATES
gd_coords <- koords[which(vdc %in% V(gd)$name),]
plot(gd,
     layout = gd_coords,
     vertex.color = V(gd)$color,
     vertex.size = 4, 
     vertex.label = V(gd)$name,
     vertex.label.color = "black", 
     vertex.label.font = 1, 
     vertex.label.cex = 0.6, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.2,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Geo-Network Flow (VDC Level)")
legend("topright",
       c("Displacement Origin","Displacement Destination"),
       fill = c("green","SkyBlue2"),
       bty = "n")

# DISPLAY THE LARGEST CLUSTER (GIANT COMPONENT):
gd_c <- giant_comp(graph = gd,
                   vertex_colors = V(gd)$color,
                   vertex_names = V(gd)$name)


# PLOT THE WEIGHTED DISPLACEMENT GRAPH
gd_c_coords <- koords[which(V(gd)$name %in% V(gd_c)$name),]
plot(gd_c,
     layout = gd_c_coords,
     vertex.color = V(gd_c)$color,
     vertex.size = 5, 
     vertex.label = V(gd_c)$name,
     vertex.label.color = "black", 
     vertex.label.font = 1, 
     vertex.label.cex = 0.7, 
     edge.width = 0.2*sqrt(E(gd_c)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Network Flow (VDC Level)")
legend("top",
       c("Displacement Origin","Displacement Destination"),
       fill = c("green","SkyBlue2"),
       bty = "n")

plot(gd_c,
     layout = layout.fruchterman.reingold(gd_c,
                                          niter = 200,
                                          area = 2000*vcount(gd_c)),
     vertex.color = V(gd_c)$color,
     vertex.size = 7, 
     vertex.label = V(gd_c)$name,
     vertex.label.color = "black", 
     vertex.label.font = 1, 
     vertex.label.cex = 1, 
     edge.width = 0.2*sqrt(E(gd_c)$weight),
     edge.arrow.size = 0.7,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Network Flow (VDC Level)")
legend("top",
       c("Displacement Origin","Displacement Destination"),
       fill = c("green","SkyBlue2"),
       bty = "n")

# EDGE-FILTRATION BY EDGE WEIGHT OF THE WEIGHTED DISPLACEMENT GRAPH: CUT-OFF = 50% quantile
cut50 <- quantile(as.vector(dtm[dtm>0]),0.50)
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)
V(gd)$name <- vdc
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))
    + length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}
gd_f <- filter(cutoff = cut50,
               edge_matrix = dtm,
               vertex_colors = V(gd)$color,
               vertex_names = V(gd)$name)
gd_f <- drop_loops(graph = gd_f,
                   vertex_colors = V(gd_f)$color,
                   vertex_names = V(gd_f)$name)
gd_f_coords <- koords[which(vdc %in% V(gd_f)$name),]
plot(gd_f,
     layout = gd_f_coords,
     vertex.color = V(gd_f)$color,
     vertex.size = 5, 
     vertex.label = V(gd_f)$name,
     vertex.label.color = "black", 
     vertex.label.font = 1, 
     vertex.label.cex = 0.7, 
     edge.width = 0.2*sqrt(E(gd_f)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Filtered Abstract Nepal Displacement Network (Cut-Off = 50%))")
legend("topright",
       c("Origins of Displacement","Destinations of Displacement"),
       fill = c("green","SkyBlue2"),
       bty = "n")

plot(gd_f,
     layout = layout.fruchterman.reingold(gd_f,
                                          niter = 200,
                                          area = 2000*vcount(gd_f)),
     vertex.color = V(gd_f)$color,
     vertex.size = 10,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black", 
     vertex.label.font = 1, 
     vertex.label.cex = 1.2, 
     edge.width = 0.4*sqrt(E(gd_f)$weight),
     edge.arrow.size = 01,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Filtered Nepal Displacement Geo-Network (Cut-Off = 50%))")
legend("topright",
       c("Origins of Displacement","Destinations of Displacement"),
       fill = c("green","SkyBlue2"),
       bty = "n")

# EDGE-FILTRATION BY EDGE WEIGHT OF THE WEIGHTED DISPLACEMENT GRAPH: CUT-OFF = 75% quantile
cut75 <- quantile(as.vector(dtm[dtm>0]),0.75)
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)
V(gd)$name <- vdc
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))
  + length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}
gd_f <- filter(cutoff = cut75,
               edge_matrix = dtm,
               vertex_colors = V(gd)$color,
               vertex_names = V(gd)$name)
gd_f <- drop_loops(graph = gd_f,
                   vertex_colors = V(gd_f)$color,
                   vertex_names = V(gd_f)$name)
gd_f_coords <- koords[which(vdc %in% V(gd_f)$name),]
plot(gd_f,
     layout = layout.fruchterman.reingold(gd_f,
                                          niter = 200,
                                          area = 2000*vcount(gd_f)),
     vertex.color = V(gd_f)$color,
     vertex.size = 14,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black", 
     vertex.label.font = 1, 
     vertex.label.cex = 1.4, 
     edge.width = 0.05*(E(gd_f)$weight),
     edge.arrow.size = 1.4,
     edge.curved = TRUE,edge.color = gray.colors(1),
     main = "Filtered Abstract Nepal Displacement Network (Cut-Off = 75%))")
legend("topright",
       c("Origins of Displacement","Destinations of Displacement"),
       fill = c("green","SkyBlue2"),
       bty = "n")

plot(gd_f,
     layout = layout.fruchterman.reingold(gd_f,
                                          niter = 200,
                                          area = 2000*vcount(gd_f)),
     vertex.color = V(gd_f)$color,
     vertex.size = 14,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black", 
     vertex.label.font = 1, 
     vertex.label.cex = 1.4, 
     edge.width = 0.05*(E(gd_f)$weight),
     edge.arrow.size = 1.4,
     edge.curved = TRUE,edge.color = gray.colors(1),
     main = "Filtered Nepal Displacement Geo-Network (Cut-Off = 75%))")
legend("topright",
       c("Origins of Displacement","Destinations of Displacement"),
       fill = c("green","SkyBlue2"),
       bty = "n")

#
#
# ANALYSIS OF THE VDC NETWORK ITSELF
#
#

# DEFINE THE WEIGHTED DISPLACEMENT GRAPH
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)


# SET VERTEX COLORS
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}


# SET THE VERTEX LABELS
V(gd)$name <- vdc


# PLOT THE WEIGHTED DISPLACEMENT GRAPH
gd <- drop_isolated(graph = gd,
                    vertex_colors = V(gd)$color,
                    vertex_names = V(gd)$name)


# DROP LOOPS ONLY VERTICES AS WELL 
gd <- drop_loops(graph = gd,
                 vertex_colors = V(gd)$color,
                 vertex_names = V(gd)$name)

# SELECT THE COORDINATES
gd_coords <- koords[which(vdc %in% V(gd)$name),]
plot(gd,
     layout = gd_coords,
     vertex.color = V(gd)$color,
     vertex.size = 4, 
     vertex.label = V(gd)$name,
     vertex.label.color = "black", 
     vertex.label.font = 1, 
     vertex.label.cex = 0.5, 
     edge.width = 0.15*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Network Flow (VDC Level)")
legend("topright",
       c("Displacement Origin","Displacement Destination"),
       fill = c("green","SkyBlue2"),
       bty = "n")

# REGENERATE THE GRAPH AGAIN
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)


# SET VERTEX COLORS
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}


# SET THE VERTEX LABELS
V(gd)$name <- vdc

# THIS IS THE NUMBER OF EDGES FROM EACH NODE
# IGNORING DIRECTION (TOTAL DEGREE)
summary(degree(gd))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   2.000   1.888   2.000  14.000
# THIS IS THE IN DEGREE SUMMARY
summary(degree(gd,mode = "in"))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  1.0000  0.9438  1.0000  4.0000
# THIS IS THE OUT DEGREE SUMMARY
summary(degree(gd,mode = "out"))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  1.0000  0.9438  1.0000 10.0000
# THIS IS THE WEIGHTED NUMBER OF THE ABOVE vdcENCIES, SO THE NUMBER OF SHARED VDC
# TARGETS IS ACCOUNTED FOR BETWEEN EACH PAIR OF vdcENCIES
summary(graph.strength(gd))
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    1.333   33.330   93.330  206.700  193.300 3529.000
# AGAIN, THE INWARD WEIGHTED DEGREE
summary(graph.strength(gd,mode = "in"))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00   33.33  103.40  108.30 1766.00
# AGAIN, THE OUTWARD WEIGHTED DEGREE
summary(graph.strength(gd,mode = "out"))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00   32.67  103.40   93.33 1764.00
# PLOT THE NUMBER OF DISTINCT VDC-VDC CONNECTIONS
plot(sort(degree(gd)),
     col = adjustcolor(rgb(1,0,1,1)),
     pch = 19,
     xaxt = "n",
     yaxt = "n",
     ann = FALSE)
par(new=T)
plot(sort(degree(gd,mode = "in")),
     col = "green",
     pch = 19,
     xaxt = "n",
     yaxt = "n",
     ann = FALSE)
par(new=T)
plot(sort(degree(gd,mode = "out")),
     col = "blue",
     pch = 19,
     xlab = "VDC Index",
     ylab = "Number of VDC Transitions",
     main = "Number of VDC Transitions To and From a Given VDC (Sorted)")
legend("topleft",
       c("Total VDC-VDC Transitions","VDC In-Transitions", "VDC Out-Transitions"),
       fill = c(adjustcolor(rgb(1,0,1,1)),"green","blue"),
       bty = "n")

# THE DEGREE DISTRIBUTION (VDC-VDC CONNECTIONS)
deg1 <- hist(degree(gd), breaks = 20)$counts

deg1n <- 100*deg1/sum(deg1)
deg2 <- hist(degree(gd,mode = "in"), breaks = 20)$counts

deg2n <- 100*deg2/sum(deg2)
deg3 <- hist(degree(gd,mode = "out"), breaks = 20)$counts

deg3n <- 100*deg3/sum(deg3)
maxn <- max(length(deg1n), length(deg2n),length(deg3n))
d1 <- append(deg1n,rep(0,maxn-length(deg1n)))
d2 <- append(deg2n,rep(0,maxn-length(deg2n)))
d3 <- append(deg3n,rep(0,maxn-length(deg3n)))
ddata <- cbind(d1,d2,d3)
barplot(t(ddata), 
        beside = T, 
        xlab = "Number of VDC-VDC Connection", 
        ylab = "Relative VDC-VDC Conneciton %", 
        main = "Distribution of VDC Transitions To and From a Given VDC",
        col = c(adjustcolor(rgb(1,0,1,1)),"green","blue"))
axis(1, 
     at = 4*(1:maxn)-2,
     labels = as.character(1:maxn),
     cex.axis = 1,
     las = 1)
legend("topright",
       legend = c("Total VDC-VDC Transitions","VDC In-Transitions", "VDC Out-Transitions"),
       col = c(adjustcolor(rgb(1,0,1,1)),"green","blue"), 
       bty = "n",pch = 15, 
       cex = 1.5)

# PLOT THE WEIGHTED NUMBER OF DISTINCT VDC-VDC CONNECTIONS
plot(sort(graph.strength(gd)),
     col = adjustcolor(rgb(1,0,1,1)),
     pch = 19,
     xaxt = "n",
     yaxt = "n",
     ann = FALSE)
par(new=T)
plot(sort(graph.strength(gd,mode = "in")),
     col = "green",
     pch = 19,
     xaxt = "n",
     yaxt = "n",
     ann = FALSE)
par(new=T)
plot(sort(graph.strength(gd,mode = "out")),
     col = "blue",
     pch = 19,
     xlab = "VDC Index",
     ylab = "Weighted Number of VDC Transitions",
     main = "Weighted Number of VDC Transitions To and From a Given VDC (Sorted)")
legend("topleft",
       c("Total Weighted VDC-VDC Transitions","Weighted VDC In-Transitions", "Weighted VDC Out-Transitions"),
       fill = c(adjustcolor(rgb(1,0,1,1)),"green","blue"),
       bty = "n")

# THE WEIGHTED DEGREE DISTRIBUTION (VDC-VDC CONNECTIONS)
deg1 <- hist(graph.strength(gd), breaks = 20)$counts

deg1n <- 100*deg1/sum(deg1)
deg2 <- hist(graph.strength(gd,mode = "in"), breaks = 20)$counts

deg2n <- 100*deg2/sum(deg2)
deg3 <- hist(graph.strength(gd,mode = "out"), breaks = 20)$counts

deg3n <- 100*deg3/sum(deg3)
maxn <- max(length(deg1n), length(deg2n),length(deg3n))
d1 <- append(deg1n,rep(0,maxn-length(deg1n)))
d2 <- append(deg2n,rep(0,maxn-length(deg2n)))
d3 <- append(deg3n,rep(0,maxn-length(deg3n)))
wddata <- cbind(d1,d2,d3)
barplot(t(wddata), 
        beside = T, 
        xlab = "Weighted Number of VDC-VDC Connection", 
        ylab = "Weighted Relative VDC-VDC Conneciton %", 
        main = "Distribution of Weighted VDC Transitions To and From a Given VDC",
        col = c(adjustcolor(rgb(1,0,1,1)),"green","blue"))
axis(1, 
     at = 4*(1:maxn)-2,
     labels = as.character(1:maxn),
     cex.axis = 1,
     las = 1)
legend("topright",
       legend = c("Total Weighted VDC-VDC Transitions","Weighted VDC In-Transitions", "Weighted VDC Out-Transitions"),
       col = c(adjustcolor(rgb(1,0,1,1)),"green","blue"), 
       bty = "n",pch = 15, 
       cex = 1.5)

# GRAPH DENSITY IS THE RATIO OF THE NUMBER OF EDGES AND THE NUMBER OF POSSIBLE EDGES
# TYPICALLY ON THE ORDER OF 1-10%
100*graph.density(gd)
## [1] 1.072523
# CLUSTERS ARE CONNECTED COMPONENTS
clusters(gd)$no
## [1] 44
# SORTED CLUSTERS BY SIZE
sort(clusters(gd)$csize,decreasing = TRUE)
##  [1] 20  6  5  5  3  3  2  2  2  2  2  2  2  2  2  1  1  1  1  1  1  1  1
## [24]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
# GLOBAL CLUSTERING COEFFICIENT (TRANSITIVITY) IS THE RATIO OF TRIANGLES AND CONNECTED TRIPLES
transitivity(gd)
## [1] 0
cut75 <- quantile(as.vector(dtm[dtm>0]),0.75)
gd_f <- filter(cutoff = cut75,
               edge_matrix = dtm,
               vertex_colors = V(gd)$color,
               vertex_names = V(gd)$name)
gd_f <- as.undirected(gd_f)
transitivity(gd_f)
## [1] 0
# RELATIVE MAXIMAL CLUSTER SIZE (AS % OF NUMBER OF NODES) 
max(clusters(gd)$csize)/vcount(gd)
## [1] 0.2247191
# RELATIVE NUMBER OF ISOLATED NODES (AS % OF NUMBER OF NODES)  
sum(degree(gd) == 0)/vcount(gd)
## [1] 0
# PATH DISTRIBUTION: This shows the different lengths of shortest paths (geodesics) in our network. 
sh <- shortest.paths(graph = gd, 
                     mode = "all",
                     weights = E(gd)$weight)
is.na(sh) <- sapply(sh,is.infinite)
paths <- na.omit(as.vector(sh))
length(paths)
## [1] 569
summary(paths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    22.0    72.0   144.7   233.3   613.3
plot(sort(paths),
     xlab = "Path Index", 
     ylab = "Path Length", 
     main = "Paths (sorted by length)", 
     pch = 20,
     col = adjustcolor(rgb(1,0,1,1)))

histP1(paths,
     breaks = 50,
     col = adjustcolor(rgb(1,0,1,1)),
     xlab = "Path Length Values",
     main = "Path Length Distribution for g")

#
#
#
# CENTRALITY ANALYSIS OF THE NETWORK
#
#
#
#
#
# BETWEENNESS CENTRALITY: THE NUMBER OF GEODESICS GOING THROUGH A NODE
#
#
gd <- drop_isolated(graph = gd,
                    vertex_colors = V(gd)$color,
                    vertex_names = V(gd)$name)
gd <- drop_loops(graph = gd,
                 vertex_colors = V(gd)$color,
                 vertex_names = V(gd)$name)
bc <- betweenness(graph = gd,
                  v = V(gd),
                  directed = TRUE,
                  weights = E(gd)$weight) 
plot(sort(bc, decreasing = TRUE),
     col = adjustcolor(rgb(1,0,1,1)), 
     xlab = "Node Index", 
     ylab = "Betweenness Centrality", 
     main = "Sorted VDC Network Betweenness Centrality Values", 
     pch = 19)

histP2(bc,
       breaks = 50,
       col = adjustcolor(rgb(1,0,1,1)),
       xlab = "Betweenness Centrality Values",
       main = "VDC Network Betweenness Centrality Distribution")

# PLOT HEAT MAP ON VERTICES ACCORDING TO BETWEENNESS CENTRALITY
bc_int <- as.integer(round(bc,0))/5
for (k in 1:length(bc_int)){
  V(gd)$color[k] <- rev(heat.colors(1+as.integer(max(bc_int))))[as.integer(bc_int[k])+1]
}
gd_coords <- koords[which(vdc %in% V(gd)$name),]
plot(gd,
     layout = layout.fruchterman.reingold(gd, niter = 200, area = 2000*vcount(gd)),
     vertex.color = V(gd)$color,
     vertex.size = 9,
     vertex.label = V(gd)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.85, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Network Flow (VDC Level)")
legend("topleft",
       c("Highest Betweenness Centrality","Lowest Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

plot(gd,
     layout = gd_coords,
     vertex.color = V(gd)$color,
     vertex.size = 4,
     vertex.label = NA, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.75, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Network Flow (VDC Level)")
legend("top",
       c("Highest Betweenness Centrality","Lowest Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

# FIND THE TOP 10% BETWEENNES NODES
top_bc <- bc[which(bc > quantile(bc,0.9))]
top_bc
##    Kathmandu Metropolitan Lalitpur Sub Metropolitan 
##                        27                         2 
##       Sangkhu Bajrayogini                   Mangkha 
##                         1                         2
# FIND THE TOP 5% BETWENNES NODES
top_bc <- bc[which(bc > quantile(bc,0.95))]
top_bc
##    Kathmandu Metropolitan Lalitpur Sub Metropolitan 
##                        27                         2 
##                   Mangkha 
##                         2
# FILTRATION AND BETWEENNESS CENTRALITY
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}
V(gd)$name <- vdc


# BETWEENNESS CENTRALITY AND FILTER AT CUTOFF = 25%
cut25 <- quantile(as.vector(dtm[dtm>0]),0.25)
gd_f <- filter(cutoff = cut25,
               edge_matrix = dtm,
               vertex_colors = V(gd)$color,
               vertex_names = V(gd)$name)
gd_f <- drop_isolated(graph = gd_f,
                    vertex_colors = V(gd_f)$color,
                    vertex_names = V(gd_f)$name)
gd_f <- drop_loops(graph = gd_f,
                 vertex_colors = V(gd_f)$color,
                 vertex_names = V(gd_f)$name)
bc <- betweenness(graph = gd_f,
                  v = V(gd_f),
                  directed = TRUE,
                  weights = E(gd_f)$weight) 
bc_int <- as.integer(round(bc,0))
for (k in 1:length(bc_int)){
  V(gd_f)$color[k] <- rev(heat.colors(1+as.integer(max(bc_int))))[as.integer(bc_int[k])+1]
}
gd_f_coords <- koords[which(V(gd)$name %in% V(gd_f)$name),]
plot(gd_f,
     layout = layout.fruchterman.reingold(gd_f, niter = 200, area = 2000*vcount(gd_f)),
     vertex.color = V(gd_f)$color,
     vertex.size = 8,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.9, 
     edge.width = 0.4*sqrt(E(gd_f)$weight),
     edge.curved = TRUE,
     edge.arrow.size = 0.75,
     edge.color = gray.colors(1),
     main = "Filtered (Threshold = 25%) Weighted Abstract Nepal Displacement Network")
legend("topleft",
       c("Highest Betweenness Centrality","Lowest Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

plot(gd_f,
     layout = gd_f_coords,
     vertex.color = V(gd_f)$color,
     vertex.size = 4,
     vertex.label = NA, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.75, 
     edge.width = 0.2*sqrt(E(gd_f)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Filtered (Threshold = 25%) Weighted Nepal Displacement Geo-Network")
legend("top",
       c("Highest Betweenness Centrality","Lowest Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

# BETWEENNESS CENTRALITY AND FILTER AT CUTOFF = 50%
cut50 <- quantile(as.vector(dtm[dtm>0]),0.50)
gd_f <- filter(cutoff = cut50,
               edge_matrix = dtm,
               vertex_colors = V(gd)$color,
               vertex_names = V(gd)$name)
gd_f <- drop_isolated(graph = gd_f,
                      vertex_colors = V(gd_f)$color,
                      vertex_names = V(gd_f)$name)
gd_f <- drop_loops(graph = gd_f,
                   vertex_colors = V(gd_f)$color,
                   vertex_names = V(gd_f)$name)
bc <- betweenness(graph = gd_f,
                  v = V(gd_f),
                  directed = TRUE,
                  weights = E(gd_f)$weight) 
bc_int <- as.integer(round(bc,0))
for (k in 1:length(bc_int)){
  V(gd_f)$color[k] <- rev(heat.colors(1+as.integer(max(bc_int))))[as.integer(bc_int[k])+1]
}
gd_f_coords <- koords[which(V(gd)$name %in% V(gd_f)$name),]
plot(gd_f,
     layout = layout.fruchterman.reingold(gd_f, niter = 200, area = 2000*vcount(gd_f)),
     vertex.color = V(gd_f)$color,
     vertex.size = 8,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.9, 
     edge.width = 0.4*sqrt(E(gd_f)$weight),
     edge.curved = TRUE,
     edge.arrow.size = 0.75,
     edge.color = gray.colors(1),
     main = "Filtered (Threshold = 50%) Weighted Abstract Nepal Displacement Network")
legend("topleft",
       c("Highest Betweenness Centrality","Lowest Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

plot(gd_f,
     layout = gd_f_coords,
     vertex.color = V(gd_f)$color,
     vertex.size = 6,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.9, 
     edge.width = 0.2*sqrt(E(gd_f)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Filtered (Threshold = 50%) Weighted Nepal Displacement Geo-Network")
legend("top",
       c("Highest Betweenness Centrality","Lowest Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

# BETWEENNESS CENTRALITY AND FILTER AT CUTOFF = 75%
cut75 <- quantile(as.vector(dtm[dtm>0]),0.75)
gd_f <- filter(cutoff = cut75,
               edge_matrix = dtm,
               vertex_colors = V(gd)$color,
               vertex_names = V(gd)$name)
gd_f <- drop_isolated(graph = gd_f,
                      vertex_colors = V(gd_f)$color,
                      vertex_names = V(gd_f)$name)
gd_f <- drop_loops(graph = gd_f,
                   vertex_colors = V(gd_f)$color,
                   vertex_names = V(gd_f)$name)
bc <- betweenness(graph = gd_f,
                  v = V(gd_f),
                  directed = TRUE,
                  weights = E(gd_f)$weight) 
bc_int <- as.integer(round(bc,0))
for (k in 1:length(bc_int)){
  V(gd_f)$color[k] <- rev(heat.colors(1+as.integer(max(bc_int))))[as.integer(bc_int[k])+1]
}
gd_f_coords <- koords[which(V(gd)$name %in% V(gd_f)$name),]
plot(gd_f,
     layout = layout.fruchterman.reingold(gd_f, niter = 200, area = 2000*vcount(gd_f)),
     vertex.color = V(gd_f)$color,
     vertex.size = 8,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.9, 
     edge.width = 0.4*sqrt(E(gd_f)$weight),
     edge.curved = TRUE,
     edge.arrow.size = 0.75,
     edge.color = gray.colors(1),
     main = "Filtered (Threshold = 75%) Weighted Abstract Nepal Displacement Network")
legend("topleft",
       c("Highest Betweenness Centrality","Lowest Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

plot(gd_f,
     layout = gd_f_coords,
     vertex.color = V(gd_f)$color,
     vertex.size = 8,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 1, 
     edge.width = 0.3*sqrt(E(gd_f)$weight),
     edge.arrow.size = 0.75,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Filtered (Threshold = 75%) Weighted Nepal Displacement Geo-Network")
legend("top",
       c("Highest Betweenness Centrality","Lowest Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

#
#
# EDGE-BETWEENNESS CENTRALITY: THE NUMBER OF GEODESICS GOING THROUGH AN EDGE
#
#
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}
V(gd)$name <- vdc
gd <- drop_isolated(graph = gd,
                    vertex_colors = V(gd)$color,
                    vertex_names = V(gd)$name)
gd <- drop_loops(graph = gd,
                 vertex_colors = V(gd)$color,
                 vertex_names = V(gd)$name)
ec <- edge.betweenness(graph = gd,
                       e = E(gd),
                       directed = TRUE,
                       weights = E(gd)$weight)
plot(sort(ec, decreasing = TRUE),
     col = adjustcolor(rgb(1,0,1,1)), 
     xlab = "Node Index", 
     ylab = "Edge Betweenness Centrality", 
     main = "Sorted VDC Network Edge Betweenness Centrality Values", 
     pch = 19)

histP2(ec,
       breaks = 50,
       col = adjustcolor(rgb(1,0,1,1)),
       xlab = "Edge Betweenness Centrality Values",
       main = "VDC Network Edge Betweenness Centrality Distribution")

# PLOT HEAT MAP ON VERTICES ACCORDING TO EDGE BETWEENNESS CENTRALITY
for (k in 1:length(ec)){
  E(gd)$color[k] <- rev(heat.colors(1+as.integer(max(ec))))[as.integer(ec[k])+1]
}
gd_coords <- koords[which(vdc %in% V(gd)$name),]
plot(gd,
     layout = layout.fruchterman.reingold(gd, niter = 200, area = 2000*vcount(gd)),
     vertex.color = V(gd)$color,
     vertex.size = 9,
     vertex.label = V(gd)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.85, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = E(gd)$color,
     main = "Weighted Nepal Displacement Network Flow (VDC Level)")
legend("topleft",
       c("Highest Edge Betweenness Centrality","Lowest Edge Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

plot(gd,
     layout = gd_coords,
     vertex.color = V(gd)$color,
     vertex.size = 4,
     vertex.label = NA, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.75, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = E(gd)$color,
     main = "Weighted Nepal Displacement Geo-Network Flow (VDC Level)")
legend("topright",
       c("Highest Edge Betweenness Centrality","Lowest Edge Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

# FILTRATION AND EDGE BETWEENNESS CENTRALITY
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}
V(gd)$name <- vdc


# EDGE BETWEENNESS CENTRALITY AND FILTER AT CUTOFF = 25%
cut25 <- quantile(as.vector(dtm[dtm>0]),0.25)
gd_f <- filter(cutoff = cut25,
               edge_matrix = dtm,
               vertex_colors = V(gd)$color,
               vertex_names = V(gd)$name)
gd_f <- drop_isolated(graph = gd_f,
                      vertex_colors = V(gd_f)$color,
                      vertex_names = V(gd_f)$name)
gd_f <- drop_loops(graph = gd_f,
                   vertex_colors = V(gd_f)$color,
                   vertex_names = V(gd_f)$name)
ec <- edge.betweenness(graph = gd_f,
                       e = E(gd_f),
                       directed = TRUE,
                       weights = E(gd_f)$weight)
for (k in 1:length(ec)){
  E(gd_f)$color[k] <- rev(heat.colors(1+as.integer(max(ec))))[as.integer(ec[k])+1]
}
gd_f_coords <- koords[which(V(gd)$name %in% V(gd_f)$name),]
plot(gd_f,
     layout = layout.fruchterman.reingold(gd_f, niter = 200, area = 2000*vcount(gd_f)),
     vertex.color = V(gd_f)$color,
     vertex.size = 8,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.9, 
     edge.width = 0.2*sqrt(E(gd_f)$weight),
     edge.curved = TRUE,
     edge.arrow.size = 0.75,
     edge.color = E(gd_f)$color,
     main = "Filtered (Threshold = 25%) Weighted Abstract Nepal Displacement Network")
legend("topleft",
       c("Highest Betweenness Centrality","Lowest Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

plot(gd_f,
     layout = gd_f_coords,
     vertex.color = V(gd_f)$color,
     vertex.size = 4,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.75, 
     edge.width = 0.2*sqrt(E(gd_f)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = E(gd_f)$color,
     main = "Filtered (Threshold = 25%) Weighted Nepal Displacement Geo-Network")
legend("topright",
       c("Highest Betweenness Centrality","Lowest Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

# EDGE BETWEENNESS CENTRALITY AND FILTER AT CUTOFF = 50%
cut50 <- quantile(as.vector(dtm[dtm>0]),0.50)
gd_f <- filter(cutoff = cut50,
               edge_matrix = dtm,
               vertex_colors = V(gd)$color,
               vertex_names = V(gd)$name)
gd_f <- drop_isolated(graph = gd_f,
                      vertex_colors = V(gd_f)$color,
                      vertex_names = V(gd_f)$name)
gd_f <- drop_loops(graph = gd_f,
                   vertex_colors = V(gd_f)$color,
                   vertex_names = V(gd_f)$name)
ec <- edge.betweenness(graph = gd_f,
                       e = E(gd_f),
                       directed = TRUE,
                       weights = E(gd_f)$weight)
for (k in 1:length(ec)){
  E(gd_f)$color[k] <- rev(heat.colors(1+as.integer(max(ec))))[as.integer(ec[k])+1]
}
gd_f_coords <- koords[which(V(gd)$name %in% V(gd_f)$name),]
plot(gd_f,
     layout = layout.fruchterman.reingold(gd_f, niter = 200, area = 2000*vcount(gd_f)),
     vertex.color = V(gd_f)$color,
     vertex.size = 8,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.9, 
     edge.width = 0.2*sqrt(E(gd_f)$weight),
     edge.curved = TRUE,
     edge.arrow.size = 0.75,
     edge.color = E(gd_f)$color,
     main = "Filtered (Threshold = 50%) Weighted Abstract Nepal Displacement Network")
legend("topleft",
       c("Highest Betweenness Centrality","Lowest Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

plot(gd_f,
     layout = gd_f_coords,
     vertex.color = V(gd_f)$color,
     vertex.size = 5,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.85, 
     edge.width = 0.2*sqrt(E(gd_f)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = E(gd_f)$color,
     main = "Filtered (Threshold = 50%) Weighted Nepal Displacement Geo-Network")
legend("topright",
       c("Highest Betweenness Centrality","Lowest Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

# EDGE BETWEENNESS CENTRALITY AND FILTER AT CUTOFF = 75%
cut75 <- quantile(as.vector(dtm[dtm>0]),0.75)
gd_f <- filter(cutoff = cut75,
               edge_matrix = dtm,
               vertex_colors = V(gd)$color,
               vertex_names = V(gd)$name)
gd_f <- drop_isolated(graph = gd_f,
                      vertex_colors = V(gd_f)$color,
                      vertex_names = V(gd_f)$name)
gd_f <- drop_loops(graph = gd_f,
                   vertex_colors = V(gd_f)$color,
                   vertex_names = V(gd_f)$name)
ec <- edge.betweenness(graph = gd_f,
                       e = E(gd_f),
                       directed = TRUE,
                       weights = E(gd_f)$weight)
for (k in 1:length(ec)){
  E(gd_f)$color[k] <- rev(heat.colors(1+as.integer(max(ec))))[as.integer(ec[k])+1]
}
gd_f_coords <- koords[which(V(gd)$name %in% V(gd_f)$name),]
plot(gd_f,
     layout = layout.fruchterman.reingold(gd_f, niter = 200, area = 2000*vcount(gd_f)),
     vertex.color = V(gd_f)$color,
     vertex.size = 8,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.9, 
     edge.width = 0.2*sqrt(E(gd_f)$weight),
     edge.curved = TRUE,
     edge.arrow.size = 0.75,
     edge.color = E(gd_f)$color,
     main = "Filtered (Threshold = 75%) Weighted Abstract Nepal Displacement Network")
legend("topleft",
       c("Highest Edge Betweenness Centrality","Lowest Edge Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

plot(gd_f,
     layout = gd_f_coords,
     vertex.color = V(gd_f)$color,
     vertex.size = 6,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.95, 
     edge.width = 0.25*sqrt(E(gd_f)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = E(gd_f)$color,
     main = "Filtered (Threshold = 75%) Weighted Nepal Displacement Geo-Network")
legend("topright",
       c("Highest Edge Betweenness Centrality","Lowest Edge Betweenness Centrality"),
       fill = c("red","White"),
       bty = "n")

#
#
# CLOSENES CENTRALITY: THIS MEASURE TAKES INTO ACCOUNT THE DISTRIBUTION OF DISTANCES
# TO OTHER NODES FROM A GIVEN NODE. IT IS DEFINED AS THE RCIPROCAL OF THE FARNESS OF A NODE.
# FARNESS OF A NODE IS DEFINED AS THE SUM OF ITS DISTANCES TO ALL OTHER NODES. CLOSENESS CAN BE
# REGARDED AS A MEASURE OF HOW LONG IT WILL TAKE TO SPREAD INFORMATION FROM A NODE TO ALL OTHER
# NODES.
#
#
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}
V(gd)$name <- vdc
gd <- drop_isolated(graph = gd,
                    vertex_colors = V(gd)$color,
                    vertex_names = V(gd)$name)
gd <- drop_loops(graph = gd,
                 vertex_colors = V(gd)$color,
                 vertex_names = V(gd)$name)
# cl <- clusters(gd)
# gd <- induced.subgraph(gd, which(cl$membership == which.max(cl$csize)))
cc <- closeness(graph = gd,
                vids = V(gd),
                weights = E(gd)$weight)
cc <- 100*(cc/max(cc))^9
plot(sort(cc, decreasing = TRUE),
     col = adjustcolor(rgb(1,0,1,1)), 
     xlab = "Node Index", 
     ylab = "Closeness Centrality", 
     main = "Sorted VDC Network Closeness Centrality Values", 
     pch = 19)

histP2(cc,
       breaks = 50,
       col = adjustcolor(rgb(1,0,1,1)),
       xlab = "Closeness Centrality Values",
       main = "VDC Network Closeness Centrality Distribution")

# PLOT HEAT MAP ON VERTICES ACCORDING TO CLOSENESS CENTRALITY
for (k in 1:length(cc)){
  V(gd)$color[k] <- rev(heat.colors(1+as.integer(max(cc))))[as.integer(cc[k])+1]
}
gd_coords <- koords[which(vdc %in% V(gd)$name),]
plot(gd,
     layout = layout.fruchterman.reingold(gd, 
                                          niter = 200, 
                                          area = 2000*vcount(gd)),
     vertex.color = V(gd)$color,
     vertex.size = 9,
     vertex.label = V(gd)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.85, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Network Flow (VDC Level)")
legend("topleft",
       c("Highest Closeness Centrality","Lowest Closeness Centrality"),
       fill = c("red","White"),
       bty = "n")

plot(gd,
     layout = gd_coords,
     vertex.color = V(gd)$color,
     vertex.size = 4,
     vertex.label = NA, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.75, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Geo-Network Flow (VDC Level)")
legend("topright",
       c("Highest Closeness Centrality","Lowest Closeness Centrality"),
       fill = c("red","White"),
       bty = "n")

# FILTRATION AND CLOSENESS CENTRALITY
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}
V(gd)$name <- vdc


# CLOSENESS CENTRALITY AND FILTER AT CUTOFF = 25%
cut25 <- quantile(as.vector(dtm[dtm>0]),0.25)
gd_f <- filter(cutoff = cut25,
               edge_matrix = dtm,
               vertex_colors = V(gd)$color,
               vertex_names = V(gd)$name)
gd_f <- drop_isolated(graph = gd_f,
                      vertex_colors = V(gd_f)$color,
                      vertex_names = V(gd_f)$name)
gd_f <- drop_loops(graph = gd_f,
                   vertex_colors = V(gd_f)$color,
                   vertex_names = V(gd_f)$name)
cc <- closeness(graph = gd_f,
                vids = V(gd_f),
                weights = E(gd_f)$weight)
cc <- 100*(cc/max(cc))^9
for (k in 1:length(cc)){
  V(gd_f)$color[k] <- rev(heat.colors(1+as.integer(max(cc))))[as.integer(cc[k])+1]
}
gd_f_coords <- koords[which(V(gd)$name %in% V(gd_f)$name),]
plot(gd_f,
     layout = layout.fruchterman.reingold(gd_f,
                                          niter = 200,
                                          area = 2000*vcount(gd_f)),
     vertex.color = V(gd_f)$color,
     vertex.size = 8,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.9, 
     edge.width = 0.2*sqrt(E(gd_f)$weight),
     edge.curved = TRUE,
     edge.arrow.size = 0.65,
     edge.color = grey.colors(1),
     main = "Filtered (Threshold = 25%) Weighted Abstract Nepal Displacement Network")
legend("topleft",
       c("Highest Closeness Centrality","Lowest Closeness Centrality"),
       fill = c("red","White"),
       bty = "n")

plot(gd_f,
     layout = gd_f_coords,
     vertex.color = V(gd_f)$color,
     vertex.size = 6,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.75, 
     edge.width = 0.2*sqrt(E(gd_f)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = grey.colors(1),
     main = "Filtered (Threshold = 25%) Weighted Nepal Displacement Geo-Network")
legend("topright",
       c("Highest Closeness Centrality","Lowest Closeness Centrality"),
       fill = c("red","White"),
       bty = "n")

# CLOSENESS CENTRALITY AND FILTER AT CUTOFF = 50%
cut50 <- quantile(as.vector(dtm[dtm>0]),0.50)
gd_f <- filter(cutoff = cut50,
               edge_matrix = dtm,
               vertex_colors = V(gd)$color,
               vertex_names = V(gd)$name)
gd_f <- drop_isolated(graph = gd_f,
                      vertex_colors = V(gd_f)$color,
                      vertex_names = V(gd_f)$name)
gd_f <- drop_loops(graph = gd_f,
                   vertex_colors = V(gd_f)$color,
                   vertex_names = V(gd_f)$name)
cc <- closeness(graph = gd_f,
                vids = V(gd_f),
                weights = E(gd_f)$weight)
cc <- 100*(cc/max(cc))^9
for (k in 1:length(cc)){
  V(gd_f)$color[k] <- rev(heat.colors(1+as.integer(max(cc))))[as.integer(cc[k])+1]
}
gd_f_coords <- koords[which(V(gd)$name %in% V(gd_f)$name),]
plot(gd_f,
     layout = layout.fruchterman.reingold(gd_f,
                                          niter = 200,
                                          area = 2000*vcount(gd_f)),
     vertex.color = V(gd_f)$color,
     vertex.size = 8,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.9, 
     edge.width = 0.2*sqrt(E(gd_f)$weight),
     edge.curved = TRUE,
     edge.arrow.size = 0.65,
     edge.color = grey.colors(1),
     main = "Filtered (Threshold = 50%) Weighted Abstract Nepal Displacement Network")
legend("topleft",
       c("Highest Closeness Centrality","Lowest Closeness Centrality"),
       fill = c("red","White"),
       bty = "n")

plot(gd_f,
     layout = gd_f_coords,
     vertex.color = V(gd_f)$color,
     vertex.size = 6,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.75, 
     edge.width = 0.2*sqrt(E(gd_f)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = grey.colors(1),
     main = "Filtered (Threshold = 50%) Weighted Nepal Displacement Geo-Network")
legend("topright",
       c("Highest Closeness Centrality","Lowest Closeness Centrality"),
       fill = c("red","White"),
       bty = "n")

# EDGE BETWEENNESS CENTRALITY AND FILTER AT CUTOFF = 75%
cut75 <- quantile(as.vector(dtm[dtm>0]),0.75)
gd_f <- filter(cutoff = cut75,
               edge_matrix = dtm,
               vertex_colors = V(gd)$color,
               vertex_names = V(gd)$name)
gd_f <- drop_isolated(graph = gd_f,
                      vertex_colors = V(gd_f)$color,
                      vertex_names = V(gd_f)$name)
gd_f <- drop_loops(graph = gd_f,
                   vertex_colors = V(gd_f)$color,
                   vertex_names = V(gd_f)$name)
cc <- closeness(graph = gd_f,
                vids = V(gd_f),
                weights = E(gd_f)$weight)
cc <- 100*(cc/max(cc))^9
for (k in 1:length(cc)){
  V(gd_f)$color[k] <- rev(heat.colors(1+as.integer(max(cc))))[as.integer(cc[k])+1]
}
gd_f_coords <- koords[which(V(gd)$name %in% V(gd_f)$name),]
plot(gd_f,
     layout = layout.fruchterman.reingold(gd_f,
                                          niter = 200,
                                          area = 2000*vcount(gd_f)),
     vertex.color = V(gd_f)$color,
     vertex.size = 12,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 1.3, 
     edge.width = 0.3*sqrt(E(gd_f)$weight),
     edge.curved = TRUE,
     edge.arrow.size = 1,
     edge.color = grey.colors(1),
     main = "Filtered (Threshold = 75%) Weighted Abstract Nepal Displacement Network")
legend("topleft",
       c("Highest Closeness Centrality","Lowest Closeness Centrality"),
       fill = c("red","White"),
       bty = "n")

plot(gd_f,
     layout = gd_f_coords,
     vertex.color = V(gd_f)$color,
     vertex.size = 8,
     vertex.label = V(gd_f)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 1, 
     edge.width = 0.3*sqrt(E(gd_f)$weight),
     edge.arrow.size = 0.75,
     edge.curved = TRUE,
     edge.color = grey.colors(1),
     main = "Filtered (Threshold = 75%) Weighted Nepal Displacement Geo-Network")
legend("topright",
       c("Highest Closeness Centrality","Lowest Closeness Centrality"),
       fill = c("red","White"),
       bty = "n")

#
#
#
# EIGENVECTOR CENTRALITY: THIS IS A MEASURE OF THE INFLUENCE OF A NODE IN A NETWORK.
# IT ASSIGNS RELATIVE SCORES TO ALL NODES BASED ON THE CONCEPT THAT CONNECTIONS
# TO HIGH-SCORING NODES CONTRIBUTE MORE TO THE SCORE OF A GIVEN NODE THAN CONNECTIONS
# TO LOW-SCORING NODES. A VARIANT OF EIGNEVECTOR CENTRALITY IS GOOGLE'S PAGERANK.
#
#
# NOTE: THIS IS LIKELY NOT THE RIGHT TOOL TO APPLY HERE aT THE VDC LEVEL
# BUT IF WE OBTAIN MORE GRANUALR DATA, WE WILL BE ABLE TO USE IT
#
#
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}
V(gd)$name <- vdc
gd <- drop_isolated(graph = gd,
                    vertex_colors = V(gd)$color,
                    vertex_names = V(gd)$name)
gd <- drop_loops(graph = gd,
                 vertex_colors = V(gd)$color,
                 vertex_names = V(gd)$name)
ec <- evcent(gd)$vector
ec <- 10*ec
plot(sort(ec, decreasing = TRUE),
     col = adjustcolor(rgb(1,0,1,1)), 
     xlab = "Node Index", 
     ylab = "Eigenvector Centrality", 
     main = "Sorted VDC Network Eigenvector Centrality Values", 
     pch = 19)

histP2(ec,
       breaks = 50,
       col = adjustcolor(rgb(1,0,1,1)),
       xlab = "Eigenvector Centrality Values",
       main = "VDC Network Eigenvector Centrality Distribution")

# PLOT HEAT MAP ON VERTICES ACCORDING TO EIGENVECTOR CENTRALITY
for (k in 1:length(ec)){
  V(gd)$color[k] <- rev(heat.colors(1+as.integer(max(ec))))[as.integer(ec[k])+1]
}
gd_coords <- koords[which(vdc %in% V(gd)$name),]
plot(gd,
     layout = layout.fruchterman.reingold(gd, 
                                          niter = 200, 
                                          area = 2000*vcount(gd)),
     vertex.color = V(gd)$color,
     vertex.size = 9,
     vertex.label = V(gd)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.85, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Network Flow (VDC Level)")
legend("topleft",
       c("Highest Eigenvector Centrality","Lowest Eigenvector Centrality"),
       fill = c("red","White"),
       bty = "n")

plot(gd,
     layout = gd_coords,
     vertex.color = V(gd)$color,
     vertex.size = 4,
     vertex.label = NA, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.75, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Geo-Network Flow (VDC Level)")
legend("topright",
       c("Highest Eigenvector Centrality","Lowest Eigenvector Centrality"),
       fill = c("red","White"),
       bty = "n")

#
#
#
# AUTHORITY SCORE: THIS IS A MEASURE FOR DIRECTED NETWORKS, THE NUMBER OF NODES THAT ARE
# HUBS AND POINT TO A GIVEN NODE. IT IS DEFIEND AS THE PRINCIPAL EIGENVECTOR FOR t(A)*A, 
# WHERE A SANDS FOR THE ADJACENCY MATRIX OF THE NETWORK. FOR UNDIRECTED NETWORKS, 
# AUTHORITY SCORE IS EQUAL TO THE HUB SCORE
#
# NOTE: THIS IS LIKELY NOT THE RIGHT TOOL TO APPLY HERE aT THE VDC LEVEL
# BUT IF WE OBTAIN MORE GRANUALR DATA, WE WILL BE ABLE TO USE IT
#

gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}
V(gd)$name <- vdc
gd <- drop_isolated(graph = gd,
                    vertex_colors = V(gd)$color,
                    vertex_names = V(gd)$name)
gd <- drop_loops(graph = gd,
                 vertex_colors = V(gd)$color,
                 vertex_names = V(gd)$name)
au <- authority.score(gd)$vector
au <- 100*au
plot(sort(au, decreasing = TRUE),
     col = adjustcolor(rgb(1,0,1,1)), 
     xlab = "Node Index", 
     ylab = "Eigenvector Centrality", 
     main = "Sorted VDC Network Authority Score Values", 
     pch = 19)

histP2(au,
       breaks = 50,
       col = adjustcolor(rgb(1,0,1,1)),
       xlab = "Authority Score Values",
       main = "VDC Network Authority Score Distribution")

# PLOT HEAT MAP ON VERTICES ACCORDING TO AUTHORITY SCORE
for (k in 1:length(au)){
  V(gd)$color[k] <- rev(heat.colors(1+as.integer(max(au))))[as.integer(au[k])+1]
}
gd_coords <- koords[which(vdc %in% V(gd)$name),]
plot(gd,
     layout = layout.fruchterman.reingold(gd, 
                                          niter = 200, 
                                          area = 2000*vcount(gd)),
     vertex.color = V(gd)$color,
     vertex.size = 9,
     vertex.label = V(gd)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.85, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Network Flow (VDC Level)")
legend("topleft",
       c("Highest Authority Score","Lowest Authority Score"),
       fill = c("red","White"),
       bty = "n")

plot(gd,
     layout = gd_coords,
     vertex.color = V(gd)$color,
     vertex.size = 4,
     vertex.label = NA, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.75, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Geo-Network Flow (VDC Level)")
legend("topright",
       c("Highest Authority Score","Lowest Authority Score"),
       fill = c("red","White"),
       bty = "n")

#
#
#
# HUB SCORE: MEASURE THE NUMBER OF AUTHORITY NODES THAT A GIVEN
# HUB NODE POINTS TO
#
# NOTE: THIS IS LIKELY NOT THE RIGHT TOOL TO APPLY HERE aT THE VDC LEVEL
# BUT IF WE OBTAIN MORE GRANUALR DATA, WE WILL BE ABLE TO USE IT
#
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}
V(gd)$name <- vdc
gd <- drop_isolated(graph = gd,
                    vertex_colors = V(gd)$color,
                    vertex_names = V(gd)$name)
gd <- drop_loops(graph = gd,
                 vertex_colors = V(gd)$color,
                 vertex_names = V(gd)$name)
hb <- hub.score(gd)$vector
hb <- 100*hb
plot(sort(hb, decreasing = TRUE),
     col = adjustcolor(rgb(1,0,1,1)), 
     xlab = "Node Index", 
     ylab = "Eigenvector Centrality", 
     main = "Sorted VDC Network Hub Score Values", 
     pch = 19)

histP2(hb,
       breaks = 50,
       col = adjustcolor(rgb(1,0,1,1)),
       xlab = "Hub Score Values",
       main = "VDC Network Hub Score Distribution")

# PLOT HEAT MAP ON VERTICES ACCORDING TO AUTHORITY SCORE
for (k in 1:length(hb)){
  V(gd)$color[k] <- rev(heat.colors(1+as.integer(max(hb))))[as.integer(hb[k])+1]
}
gd_coords <- koords[which(vdc %in% V(gd)$name),]
plot(gd,
     layout = layout.fruchterman.reingold(gd, 
                                          niter = 200, 
                                          area = 2000*vcount(gd)),
     vertex.color = V(gd)$color,
     vertex.size = 9,
     vertex.label = V(gd)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.85, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Network Flow (VDC Level)")
legend("topleft",
       c("Highest Hub Score","Lowest Hub Score"),
       fill = c("red","White"),
       bty = "n")

plot(gd,
     layout = gd_coords,
     vertex.color = V(gd)$color,
     vertex.size = 4,
     vertex.label = NA, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.75, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Geo-Network Flow (VDC Level)")
legend("topright",
       c("Highest Hub Score","Lowest Hub Score"),
       fill = c("red","White"),
       bty = "n")

#
#
#
# CLUSTERING COEFFICIENT: THIS MEASURES THE PROBABILITY THAT TWO 
# VERTICES ARE CONNECTED
#
# NOTE: SINC ENO CLSOED TRIPLETS ARE PRESENT, WE GET 0 OR NA VALUES ONLY
# THIS IS LIKELY NOT THE RIGHT TOOL TO APPLY HERE aT THE VDC LEVEL
# BUT IF WE OBTAIN MORE GRANUALR DATA, WE WILL BE ABLE TO USE IT
#
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}
V(gd)$name <- vdc
gd <- drop_isolated(graph = gd,
                    vertex_colors = V(gd)$color,
                    vertex_names = V(gd)$name)
gd <- drop_loops(graph = gd,
                 vertex_colors = V(gd)$color,
                 vertex_names = V(gd)$name)
tr <- transitivity(graph = gd,type = "local")
plot(sort(hb, decreasing = TRUE),
     col = adjustcolor(rgb(1,0,1,1)), 
     xlab = "Node Index", 
     ylab = "Eigenvector Centrality", 
     main = "Sorted VDC Network Hub Score Values", 
     pch = 19)

histP2(hb,
       breaks = 50,
       col = adjustcolor(rgb(1,0,1,1)),
       xlab = "Hub Score Values",
       main = "VDC Network Hub Score Distribution")

# PLOT HEAT MAP ON VERTICES ACCORDING TO AUTHORITY SCORE
# for (k in 1:length(hb)){
#   V(gd)$color[k] <- rev(heat.colors(1+as.integer(max(hb))))[as.integer(hb[k])+1]
# }
# gd_coords <- koords[which(vdc %in% V(gd)$name),]
# plot(gd,
#      layout = layout.fruchterman.reingold(gd, 
#                                           niter = 200, 
#                                           area = 2000*vcount(gd)),
#      vertex.color = V(gd)$color,
#      vertex.size = 9,
#      vertex.label = V(gd)$name, 
#      vertex.label.color = "black",
#      vertex.label.font = 1, 
#      vertex.label.cex = 0.85, 
#      edge.width = 0.2*sqrt(E(gd)$weight),
#      edge.arrow.size = 0.5,
#      edge.curved = TRUE,
#      edge.color = gray.colors(1),
#      main = "Weighted Nepal Displacement Network Flow (VDC Level)")
# legend("topleft",
#        c("Highest Hub Score","Lowest Hub Score"),
#        fill = c("red","White"),
#        bty = "n")
# plot(gd,
#      layout = gd_coords,
#      vertex.color = V(gd)$color,
#      vertex.size = 4,
#      vertex.label = NA, 
#      vertex.label.color = "black",
#      vertex.label.font = 1, 
#      vertex.label.cex = 0.75, 
#      edge.width = 0.2*sqrt(E(gd)$weight),
#      edge.arrow.size = 0.5,
#      edge.curved = TRUE,
#      edge.color = gray.colors(1),
#      main = "Weighted Nepal Displacement Geo-Network Flow (VDC Level)")
# legend("topright",
#        c("Highest Hub Score","Lowest Hub Score"),
#        fill = c("red","White"),
#        bty = "n")
#
#
#
#
# COMMUNITY STRUCTURES FOR THE DISPLACEMENT NETWORK
#
#
#
# EDGE-BETWEENNESS COMMUNITY
# The edge betweenness score of an edge measures the number of shortest paths through it, see edge.betweenness for details. 
# The idea of the edge betweenness based community structure detection is that it is likely that edges connecting separate modules 
# have high edge betweenness as all the shortest paths from one module to another must traverse through them. 
# So if we gradually remove the edge with the highest edge betweenness score we will get a hierarchical map, a rooted tree, 
# called a dendrogram of the graph. The leafs of the tree are the individual vertices and the root of the tree represents the whole graph.
# edge.betweenness.community performs this algorithm by calculating the edge betweenness of the graph, 
# removing the edge with the highest edge betweenness score, then recalculating edge betweenness of the edges 
# and again removing the one with the highest score, etc.
#
#
#
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}
V(gd)$name <- vdc
gd <- drop_isolated(graph = gd,
                    vertex_colors = V(gd)$color,
                    vertex_names = V(gd)$name)
gd <- drop_loops(graph = gd,
                 vertex_colors = V(gd)$color,
                 vertex_names = V(gd)$name)
ebc <- edge.betweenness.community(graph = gd,
                                  weights = E(gd)$weight,
                                  directed = TRUE)
gd_coords <- koords[which(vdc %in% V(gd)$name),]
plot(ebc,
     gd, 
     layout = gd_coords,
     vertex.size = 4, 
     vertex.label = NA,
     vertex.label.color = "black", 
     vertex.label.font = 1, 
     vertex.label.cex = 0.85, 
     edge.width = 0.1*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
main = "Weighted Nepal Displacement Geo-Network Communities (EB)")

plot(ebc,
     gd,
     layout = layout.fruchterman.reingold(gd, 
                                          niter = 200, 
                                          area = 2000*vcount(gd)),
     vertex.color = V(gd)$color,
     vertex.size = 9,
     vertex.label = V(gd)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.85, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Network Communities (EB)")

# SOME BASIC EDGE-BETWEENNESS COMMUNITY STATS
plot(sort(ebc$membership), 
     col = adjustcolor(rgb(1,0,1,1)), 
     xlab = "Node Index (VDC) in the Network", 
     ylab = "Edge-Betweenness Community Values", 
     main = "Sorted Edge-Betweenness Community Values for Nepal Displacement Network", 
     pch = 19)

histP1(ebc$membership,
       breaks = 60,
       col = adjustcolor(rgb(1,0,1,1)),
       xlab = "Edge-Betweenness Community Values",
       main = "Edge-Betweenness Community Distribution for Nepal Displacement Network")

#
#
#
# SIMPLEST CLUSTERING/COMMUNITY DETECTION
#
# This clustering is based on the connectivity of the graph.
# In a way, the msot basic one, but, as it can be observed, 
# not necessarily the most informative or functional as
# there are occasional conenctions that ar enonessential
# but would have a significant imapct for this method.
#
#
#
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}
V(gd)$name <- vdc
gd <- drop_isolated(graph = gd,
                    vertex_colors = V(gd)$color,
                    vertex_names = V(gd)$name)
gd <- drop_loops(graph = gd,
                 vertex_colors = V(gd)$color,
                 vertex_names = V(gd)$name)
gd_coords <- koords[which(vdc %in% V(gd)$name),]
strongclusters <- clusters(gd)$membership
plot(gd,
     layout = layout.fruchterman.reingold(gd, 
                                          niter = 200, 
                                          area = 2000*vcount(gd)),
     vertex.color = strongclusters,
     vertex.size = 9,
     vertex.label = V(gd)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.85, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Network Clusters")

plot(gd,
     layout = gd_coords,
     vertex.color = strongclusters,
     vertex.size = 4,
     vertex.label = NA, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.75, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Geo-Network Flow Clusters")

# SOME BASIC CLUSTERING COMMUNITY STATS
plot(sort(clusters(gd)$membership), 
     col = adjustcolor(rgb(1,0,1,1)), 
     xlab = "Node Index (VDC) in the Network", 
     ylab = "Clustering Community Values", 
     main = "Sorted Clustering Community Values for Nepal Displacement Network", 
     pch = 19)

histP1(clusters(gd)$membership,
       breaks = 60,
       col = adjustcolor(rgb(1,0,1,1)),
       xlab = "Clustering Community Values",
       main = "Clustering Community Distribution for Nepal Displacement Network")

#
#
#
#
# MULTILEVEL COMMUNITY DETECTION
#
# NOTE: THIS APPLEIS TO UNDIRECTED GRAPHS ONLY
#
# Multilevel community detection is based on the following approach. 
# Assume that we start with a weighted network of N nodes. 
# First, we assign a different community to each node of the network. 
# So, in this initial partition there are as many communities as there are nodes. 
# Then, for each node i we consider the neighbours j of i and we evaluate 
# the gain of modularity that would take place by removing i from its community 
# and by placing it in the community of j. The node i is then placed in the community 
# for which this gain is maximum (in case of a tie we use a breaking rule), 
# but only if this gain is positive. If no positive gain is possible, 
# i stays in its original community. This process is applied repeatedly 
# and sequentially for all nodes until no further improvement can be achieved. 
# Note that a node may be, and often is, considered several times. Also, 
# the output of the algorithm depends on the order in which the nodes 
# are considered, although it can be shown that the order has little effect.
#
#
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}
V(gd)$name <- vdc
gd <- drop_isolated(graph = gd,
                    vertex_colors = V(gd)$color,
                    vertex_names = V(gd)$name)
gd <- drop_loops(graph = gd,
                 vertex_colors = V(gd)$color,
                 vertex_names = V(gd)$name)
gd_coords <- koords[which(vdc %in% V(gd)$name),]
gd <- as.undirected(gd)
mc <- multilevel.community(graph = gd,
                           weights = E(gd)$weights)
plot(mc,
     gd,
     layout = layout.fruchterman.reingold(gd, 
                                          niter = 200, 
                                          area = 2000*vcount(gd)),
     vertex.color = mc,
     vertex.size = 9,
     vertex.label = V(gd)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.85, 
     edge.width = 0.3*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Network Multilevel Communities")

plot(mc,
     gd,
     layout = gd_coords,
     vertex.color = strongclusters,
     vertex.size = 4,
     vertex.label = NA, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.75, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Geo-Network Multilevel Communities")

# SOME BASIC MULTILEVEL COMMUNITY STATS
plot(sort(mc$membership), 
     col = adjustcolor(rgb(1,0,1,1)), 
     xlab = "Node Index (VDC) in the Network", 
     ylab = "Multilevel Community Values", 
     main = "Sorted Multilevel Community Values for Nepal Displacement Network", 
     pch = 19)

histP1(mc$membership,
     breaks = 60,
     col = adjustcolor(rgb(1,0,1,1)),
     xlab = "Multilevel Community Values",
     main = "Multilevel Community Distribution for Nepal Displacement Network")

#
#
#
# WALKTRAP COMMUNITY DETECTION
#
#
# Walktrap community detection aims to find 
# densely connected subgraphs using random walks with the 
# premise that short random walks should be contained within the same cluster.
#
#
#
gd <- graph.adjacency(dtm,
                      mode = "directed",
                      weighted = TRUE)
V(gd)$color <- rep("SkyBlue2",length(vdc))
for (k in 1:length(vdc)){
  o_count <- length(which(dt_data$idp_origin_vdc == vdc[k]))+
    length(which(dt_data$idp2_origin_vdc == vdc[k]))
  d_count <- length(which(dt_data$vdc == vdc[k]))
  if(o_count>d_count){
    V(gd)$color[k] <- "green"
  } 
}
V(gd)$name <- vdc
gd <- drop_isolated(graph = gd,
                    vertex_colors = V(gd)$color,
                    vertex_names = V(gd)$name)
gd <- drop_loops(graph = gd,
                 vertex_colors = V(gd)$color,
                 vertex_names = V(gd)$name)
gd_coords <- koords[which(vdc %in% V(gd)$name),]
gd <- as.undirected(gd)
wc <- walktrap.community(graph = gd,
                           weights = E(gd)$weights)
plot(wc,
     gd,
     layout = layout.fruchterman.reingold(gd, 
                                          niter = 200, 
                                          area = 2000*vcount(gd)),
     vertex.color = mc,
     vertex.size = 9,
     vertex.label = V(gd)$name, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.85, 
     edge.width = 0.3*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Network Walktrap Communities")

plot(wc,
     gd,
     layout = gd_coords,
     vertex.color = strongclusters,
     vertex.size = 4,
     vertex.label = NA, 
     vertex.label.color = "black",
     vertex.label.font = 1, 
     vertex.label.cex = 0.75, 
     edge.width = 0.2*sqrt(E(gd)$weight),
     edge.arrow.size = 0.5,
     edge.curved = TRUE,
     edge.color = gray.colors(1),
     main = "Weighted Nepal Displacement Geo-Network Walktrap Communities")

# SOME BASIC WALKTRAP STATS
plot(sort(wc$membership), 
     col = adjustcolor(rgb(1,0,1,1)), 
     xlab = "Node Index (VDC) in the Network", 
     ylab = "Walktrap Community Values", 
     main = "Sorted Walktrap Community Values for Nepal Displacement Network", 
     pch = 19)

histP1(wc$membership,
       breaks = 60,
       col = adjustcolor(rgb(1,0,1,1)),
       xlab = "Walktrap Community Values",
       main = "Walktrap Community Distribution for Nepal Displacement Network")